From 7238b7fbf4d12e2d9a715840e530ff4bba30e2e5 Mon Sep 17 00:00:00 2001 From: "gm281@boulderdash.cl.cam.ac.uk" Date: Wed, 28 Jul 2004 18:02:38 +0000 Subject: [PATCH] bitkeeper revision 1.1108.34.1 (4107ea3eSSzRurUHPnjryAZ3X8VXbg) New locking scheme for schedulers --- xen/common/domain.c | 4 +- xen/common/sched_bvt.c | 150 ++++++++++++++++--------- xen/common/sched_fair_bvt.c | 217 +++++++++++++++++++++--------------- xen/common/sched_rrobin.c | 47 ++++++-- xen/common/schedule.c | 24 ++-- xen/include/xen/sched.h | 1 + 6 files changed, 278 insertions(+), 165 deletions(-) diff --git a/xen/common/domain.c b/xen/common/domain.c index 111210c5e7..3a1bfc2e7c 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -40,7 +40,9 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu) d->domain = dom_id; d->processor = cpu; d->create_time = NOW(); - + /* Initialise the state_lock */ + spin_lock_init(&d->state_lock); + memcpy(&d->thread, &idle0_task.thread, sizeof(d->thread)); if ( d->domain != IDLE_DOMAIN_ID ) diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c index e4498359e6..c03f2a78d3 100644 --- a/xen/common/sched_bvt.c +++ b/xen/common/sched_bvt.c @@ -45,6 +45,7 @@ struct bvt_dom_info struct bvt_cpu_info { + spinlock_t run_lock; /* protects runqueue */ struct list_head runqueue; /* runqueue for given processor */ unsigned long svt; /* XXX check this is unsigned long! */ }; @@ -148,15 +149,86 @@ int bvt_init_idle_task(struct domain *p) bvt_add_task(p); - spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags); + spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags); + set_bit(DF_RUNNING, &p->flags); if ( !__task_on_runqueue(RUNLIST(p)) ) __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor)); - spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags); + + spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags); return 0; } +void bvt_wake(struct domain *d) +{ + unsigned long flags; + struct bvt_dom_info *inf = BVT_INFO(d); + struct domain *curr; + s_time_t now, min_time; + int cpu = d->processor; + + /* The runqueue accesses must be protected */ + spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags); + + /* If on the runqueue already then someone has done the wakeup work. */ + if ( unlikely(__task_on_runqueue(RUNLIST(d))) ) + { + spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags); + return; + } + + __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(d->processor)); + + now = NOW(); + + /* Set the BVT parameters. */ + if ( inf->avt < CPU_SVT(cpu) ) + inf->avt = CPU_SVT(cpu); + + /* Deal with warping here. */ + inf->warpback = 1; + inf->warped = now; + __calc_evt(inf); + spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags); + + /* Access to schedule_data protected by schedule_lock */ + spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags); + + curr = schedule_data[cpu].curr; + + /* Currently-running domain should run at least for ctx_allow. */ + min_time = curr->lastschd + curr->min_slice; + + spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags); + + if ( is_idle_task(curr) || (min_time <= now) ) + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) ) + mod_ac_timer(&schedule_data[cpu].s_timer, min_time); + +} + + +static void bvt_sleep(struct domain *d) +{ + unsigned long flags; + + if ( test_bit(DF_RUNNING, &d->flags) ) + cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); + else + { + /* The runqueue accesses must be protected */ + spin_lock_irqsave(&CPU_INFO(d->processor)->run_lock, flags); + + + if ( __task_on_runqueue(RUNLIST(d)) ) + __del_from_runqueue(RUNLIST(d)); + + spin_unlock_irqrestore(&CPU_INFO(d->processor)->run_lock, flags); + } +} + /** * bvt_free_task - free BVT private structures for a task * @p: task @@ -218,7 +290,7 @@ int bvt_adjdom(struct domain *p, if ( mcu_adv == 0 ) return -EINVAL; - spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags); + spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags); inf->mcu_advance = mcu_adv; inf->warp = warp; inf->warpl = warpl; @@ -229,18 +301,18 @@ int bvt_adjdom(struct domain *p, p->domain, inf->mcu_advance, inf->warp, inf->warpl, inf->warpu ); - spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags); + spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags); } else if ( cmd->direction == SCHED_INFO_GET ) { struct bvt_dom_info *inf = BVT_INFO(p); - spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags); + spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags); params->mcu_adv = inf->mcu_advance; params->warp = inf->warp; params->warpl = inf->warpl; params->warpu = inf->warpu; - spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags); + spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags); } return 0; @@ -256,6 +328,7 @@ int bvt_adjdom(struct domain *p, */ static task_slice_t bvt_do_schedule(s_time_t now) { + unsigned long flags; struct domain *prev = current, *next = NULL, *next_prime, *p; struct list_head *tmp; int cpu = prev->processor; @@ -269,8 +342,12 @@ static task_slice_t bvt_do_schedule(s_time_t now) *next_prime_inf = NULL; task_slice_t ret; + ASSERT(prev->sched_priv != NULL); ASSERT(prev_inf != NULL); + spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags); + + ASSERT(__task_on_runqueue(RUNLIST(prev))); if ( likely(!is_idle_task(prev)) ) { @@ -329,7 +406,9 @@ static task_slice_t bvt_do_schedule(s_time_t now) if ( p_inf->avt < min_avt ) min_avt = p_inf->avt; } - + + spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags); + /* Extract the domain pointers from the dom infos */ next = next_inf->domain; next_prime = next_prime_inf->domain; @@ -341,8 +420,10 @@ static task_slice_t bvt_do_schedule(s_time_t now) /* check for virtual time overrun on this cpu */ if ( CPU_SVT(cpu) >= 0xf0000000 ) { - u_long t_flags; + u_long t_flags; + write_lock_irqsave(&tasklist_lock, t_flags); + for_each_domain ( p ) { if ( p->processor == cpu ) @@ -352,7 +433,9 @@ static task_slice_t bvt_do_schedule(s_time_t now) p_inf->avt -= 0xe0000000; } } + write_unlock_irqrestore(&tasklist_lock, t_flags); + CPU_SVT(cpu) -= 0xe0000000; } @@ -411,7 +494,7 @@ static void bvt_dump_cpu_state(int i) struct bvt_dom_info *d_inf; struct domain *d; - spin_lock_irqsave(&schedule_data[i].schedule_lock, flags); + spin_lock_irqsave(&CPU_INFO(i)->run_lock, flags); printk("svt=0x%08lX ", CPU_SVT(i)); queue = RUNQUEUE(i); @@ -430,7 +513,7 @@ static void bvt_dump_cpu_state(int i) (unsigned long)list, (unsigned long)list->next, (unsigned long)list->prev); } - spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags); + spin_unlock_irqrestore(&CPU_INFO(i)->run_lock, flags); } /* We use cache to create the bvt_dom_infos @@ -452,14 +535,16 @@ int bvt_init_scheduler() for ( i = 0; i < NR_CPUS; i++ ) { schedule_data[i].sched_priv = xmalloc(sizeof(struct bvt_cpu_info)); - INIT_LIST_HEAD(RUNQUEUE(i)); - + if ( schedule_data[i].sched_priv == NULL ) { printk("Failed to allocate BVT scheduler per-CPU memory!\n"); return -1; } + INIT_LIST_HEAD(RUNQUEUE(i)); + spin_lock_init(&CPU_INFO(i)->run_lock); + CPU_SVT(i) = 0; /* XXX do I really need to do this? */ } @@ -476,48 +561,7 @@ int bvt_init_scheduler() return 0; } -static void bvt_sleep(struct domain *d) -{ - if ( test_bit(DF_RUNNING, &d->flags) ) - cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); - else if ( __task_on_runqueue(RUNLIST(d)) ) - __del_from_runqueue(RUNLIST(d)); -} -void bvt_wake(struct domain *d) -{ - struct bvt_dom_info *inf = BVT_INFO(d); - struct domain *curr; - s_time_t now, min_time; - int cpu = d->processor; - - /* If on the runqueue already then someone has done the wakeup work. */ - if ( unlikely(__task_on_runqueue(RUNLIST(d))) ) - return; - - __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(d->processor)); - - now = NOW(); - - /* Set the BVT parameters. */ - if ( inf->avt < CPU_SVT(cpu) ) - inf->avt = CPU_SVT(cpu); - - /* Deal with warping here. */ - inf->warpback = 1; - inf->warped = now; - __calc_evt(inf); - - curr = schedule_data[cpu].curr; - - /* Currently-running domain should run at least for ctx_allow. */ - min_time = curr->lastschd + curr->min_slice; - - if ( is_idle_task(curr) || (min_time <= now) ) - cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); - else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) ) - mod_ac_timer(&schedule_data[cpu].s_timer, min_time); -} struct scheduler sched_bvt_def = { .name = "Borrowed Virtual Time", diff --git a/xen/common/sched_fair_bvt.c b/xen/common/sched_fair_bvt.c index a8a60bafd1..8659ab1dae 100644 --- a/xen/common/sched_fair_bvt.c +++ b/xen/common/sched_fair_bvt.c @@ -52,6 +52,7 @@ struct fbvt_dom_info struct fbvt_cpu_info { + spinlock_t run_lock; /* protects runqueue */ struct list_head runqueue; /* runqueue for this CPU */ unsigned long svt; /* XXX check this is unsigned long! */ u32 vtb; /* virtual time bonus */ @@ -160,15 +161,122 @@ int fbvt_init_idle_task(struct domain *p) if(fbvt_alloc_task(p) < 0) return -1; fbvt_add_task(p); - spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags); + spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags); set_bit(DF_RUNNING, &p->flags); if ( !__task_on_runqueue(RUNLIST(p)) ) __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor)); - spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags); + spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags); return 0; } +static void fbvt_wake(struct domain *d) +{ + unsigned long flags; + struct fbvt_dom_info *inf = FBVT_INFO(d); + struct domain *curr; + s_time_t now, min_time; + int cpu = d->processor; + s32 io_warp; + + /* The runqueue accesses must be protected */ + spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags); + + /* If on the runqueue already then someone has done the wakeup work. */ + if ( unlikely(__task_on_runqueue(RUNLIST(d))) ) + { + spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags); + return; + } + + __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu)); + + now = NOW(); + +#if 0 + /* + * XXX KAF: This was fbvt_unpause(). Not sure if it's the right thing + * to do, in light of the stuff that fbvt_wake_up() does. + * e.g., setting 'inf->avt = CPU_SVT(cpu);' would make the later test + * 'inf->avt < CPU_SVT(cpu)' redundant! + */ + if ( d->domain == IDLE_DOMAIN_ID ) + { + inf->avt = inf->evt = ~0U; + } + else + { + /* Set avt to system virtual time. */ + inf->avt = CPU_SVT(cpu); + /* Set some default values here. */ + LAST_VTB(cpu) = 0; + __calc_evt(inf); + } +#endif + + /* Set the BVT parameters. */ + if ( inf->avt < CPU_SVT(cpu) ) + { + /* + * We want IO bound processes to gain dispatch precedence. It is + * especially for device driver domains. Therefore AVT + * not be updated to SVT but to a value marginally smaller. + * Since frequently sleeping domains have high time_slept + * values, the virtual time can be determined as: + * SVT - const * TIME_SLEPT + */ + io_warp = (int)(0.5 * inf->time_slept); + if ( io_warp > 1000 ) + io_warp = 1000; + + ASSERT(inf->time_slept + CPU_SVT(cpu) > inf->avt + io_warp); + inf->time_slept += CPU_SVT(cpu) - inf->avt - io_warp; + inf->avt = CPU_SVT(cpu) - io_warp; + } + + /* Deal with warping here. */ + inf->warpback = 1; + inf->warped = now; + __calc_evt(inf); + spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags); + + /* Access to schedule_data protected by schedule_lock */ + spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags); + + + curr = schedule_data[cpu].curr; + + /* Currently-running domain should run at least for ctx_allow. */ + min_time = curr->lastschd + curr->min_slice; + + spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags); + + if ( is_idle_task(curr) || (min_time <= now) ) + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) ) + mod_ac_timer(&schedule_data[cpu].s_timer, min_time); +} + + +static void fbvt_sleep(struct domain *d) +{ + unsigned long flags; + + + if ( test_bit(DF_RUNNING, &d->flags) ) + cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); + else + { + /* The runqueue accesses must be protected */ + spin_lock_irqsave(&CPU_INFO(d->processor)->run_lock, flags); + + if ( __task_on_runqueue(RUNLIST(d)) ) + __del_from_runqueue(RUNLIST(d)); + + spin_unlock_irqrestore(&CPU_INFO(d->processor)->run_lock, flags); + } +} + /** * fbvt_free_task - free FBVT private structures for a task @@ -232,7 +340,7 @@ int fbvt_adjdom(struct domain *p, if ( mcu_adv == 0 ) return -EINVAL; - spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags); + spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags); inf->mcu_advance = mcu_adv; inf->warp = warp; inf->warpl = warpl; @@ -243,20 +351,18 @@ int fbvt_adjdom(struct domain *p, p->domain, inf->mcu_advance, inf->warp, inf->warpl, inf->warpu ); - spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, - flags); + spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags); } else if ( cmd->direction == SCHED_INFO_GET ) { struct fbvt_dom_info *inf = FBVT_INFO(p); - spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags); + spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags); params->mcu_adv = inf->mcu_advance; params->warp = inf->warp; params->warpl = inf->warpl; params->warpu = inf->warpu; - spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, - flags); + spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags); } return 0; @@ -272,6 +378,7 @@ int fbvt_adjdom(struct domain *p, */ static task_slice_t fbvt_do_schedule(s_time_t now) { + unsigned long flags; struct domain *prev = current, *next = NULL, *next_prime, *p; struct list_head *tmp; int cpu = prev->processor; @@ -288,6 +395,10 @@ static task_slice_t fbvt_do_schedule(s_time_t now) ASSERT(prev->sched_priv != NULL); ASSERT(prev_inf != NULL); + + spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags); + + ASSERT(__task_on_runqueue(RUNLIST(prev))); if ( likely(!is_idle_task(prev)) ) { @@ -365,6 +476,8 @@ static task_slice_t fbvt_do_schedule(s_time_t now) min_avt = p_inf->avt; } + spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags); + /* Extract the domain pointers from the dom infos */ next = next_inf->domain; next_prime = next_prime_inf->domain; @@ -475,7 +588,7 @@ static void fbvt_dump_cpu_state(int i) struct fbvt_dom_info *d_inf; struct domain *d; - spin_lock_irqsave(&schedule_data[i].schedule_lock, flags); + spin_lock_irqsave(&CPU_INFO(i)->run_lock, flags); printk("svt=0x%08lX ", CPU_SVT(i)); queue = RUNQUEUE(i); @@ -494,7 +607,7 @@ static void fbvt_dump_cpu_state(int i) (unsigned long)list, (unsigned long)list->next, (unsigned long)list->prev); } - spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags); + spin_unlock_irqrestore(&CPU_INFO(i)->run_lock, flags); } @@ -519,13 +632,16 @@ int fbvt_init_scheduler() for ( i = 0; i < NR_CPUS; i++ ) { schedule_data[i].sched_priv = xmalloc(sizeof(struct fbvt_cpu_info)); - INIT_LIST_HEAD(RUNQUEUE(i)); + if ( schedule_data[i].sched_priv == NULL ) { printk("Failed to allocate FBVT scheduler per-CPU memory!\n"); return -1; } + INIT_LIST_HEAD(RUNQUEUE(i)); + spin_lock_init(&CPU_INFO(i)->run_lock); + CPU_SVT(i) = 0; /* XXX do I really need to do this? */ } @@ -541,86 +657,7 @@ int fbvt_init_scheduler() return 0; } - -static void fbvt_sleep(struct domain *d) -{ - if ( test_bit(DF_RUNNING, &d->flags) ) - cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); - else if ( __task_on_runqueue(RUNLIST(d)) ) - __del_from_runqueue(RUNLIST(d)); -} - -static void fbvt_wake(struct domain *d) -{ - struct fbvt_dom_info *inf = FBVT_INFO(d); - struct domain *curr; - s_time_t now, min_time; - int cpu = d->processor; - s32 io_warp; - - /* If on the runqueue already then someone has done the wakeup work. */ - if ( unlikely(__task_on_runqueue(RUNLIST(d))) ) - return; - __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu)); - now = NOW(); - -#if 0 - /* - * XXX KAF: This was fbvt_unpause(). Not sure if it's the right thing - * to do, in light of the stuff that fbvt_wake_up() does. - * e.g., setting 'inf->avt = CPU_SVT(cpu);' would make the later test - * 'inf->avt < CPU_SVT(cpu)' redundant! - */ - if ( d->domain == IDLE_DOMAIN_ID ) - { - inf->avt = inf->evt = ~0U; - } - else - { - /* Set avt to system virtual time. */ - inf->avt = CPU_SVT(cpu); - /* Set some default values here. */ - LAST_VTB(cpu) = 0; - __calc_evt(inf); - } -#endif - - /* Set the BVT parameters. */ - if ( inf->avt < CPU_SVT(cpu) ) - { - /* - * We want IO bound processes to gain dispatch precedence. It is - * especially for device driver domains. Therefore AVT - * not be updated to SVT but to a value marginally smaller. - * Since frequently sleeping domains have high time_slept - * values, the virtual time can be determined as: - * SVT - const * TIME_SLEPT - */ - io_warp = (int)(0.5 * inf->time_slept); - if ( io_warp > 1000 ) - io_warp = 1000; - - ASSERT(inf->time_slept + CPU_SVT(cpu) > inf->avt + io_warp); - inf->time_slept += CPU_SVT(cpu) - inf->avt - io_warp; - inf->avt = CPU_SVT(cpu) - io_warp; - } - - /* Deal with warping here. */ - inf->warpback = 1; - inf->warped = now; - __calc_evt(inf); - - curr = schedule_data[cpu].curr; - - /* Currently-running domain should run at least for ctx_allow. */ - min_time = curr->lastschd + curr->min_slice; - - if ( is_idle_task(curr) || (min_time <= now) ) - cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); - else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) ) - mod_ac_timer(&schedule_data[cpu].s_timer, min_time); -} struct scheduler sched_fbvt_def = { .name = "Fair Borrowed Virtual Time", diff --git a/xen/common/sched_rrobin.c b/xen/common/sched_rrobin.c index 49daaa05d2..b5ab6000e5 100644 --- a/xen/common/sched_rrobin.c +++ b/xen/common/sched_rrobin.c @@ -23,6 +23,8 @@ struct rrobin_dom_info struct domain *domain; }; +static spinlock_t run_locks[NR_CPUS]; + #define RR_INFO(d) ((struct rrobin_dom_info *)d->sched_priv) #define RUNLIST(d) (struct list_head *)&(RR_INFO(d)->run_list) #define RUNQUEUE(cpu) RUNLIST(schedule_data[cpu].idle) @@ -49,7 +51,10 @@ static int rr_init_scheduler() int i; for ( i = 0; i < NR_CPUS; i++ ) + { INIT_LIST_HEAD(RUNQUEUE(i)); + spin_lock_init(&run_locks[i]); + } dom_info_cache = xmem_cache_create("FBVT dom info", sizeof(struct rrobin_dom_info), @@ -95,11 +100,11 @@ static int rr_init_idle_task(struct domain *p) if(rr_alloc_task(p) < 0) return -1; rr_add_task(p); - spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags); + spin_lock_irqsave(&run_locks[p->processor], flags); set_bit(DF_RUNNING, &p->flags); if ( !__task_on_runqueue(RUNLIST(p)) ) __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor)); - spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags); + spin_unlock_irqrestore(&run_locks[p->processor], flags); return 0; } @@ -107,11 +112,14 @@ static int rr_init_idle_task(struct domain *p) /* Main scheduling function */ static task_slice_t rr_do_schedule(s_time_t now) { + unsigned long flags; struct domain *prev = current; int cpu = current->processor; task_slice_t ret; - + + spin_lock_irqsave(&run_locks[cpu], flags); + if(!is_idle_task(prev)) { __del_from_runqueue(RUNLIST(prev)); @@ -120,6 +128,8 @@ static task_slice_t rr_do_schedule(s_time_t now) __add_to_runqueue_tail(RUNLIST(prev), RUNQUEUE(cpu)); } + spin_unlock_irqrestore(&run_locks[cpu], flags); + ret.task = list_entry( RUNQUEUE(cpu).next->next, struct rrobin_dom_info, run_list)->domain; @@ -149,27 +159,44 @@ static void rr_dump_settings() static void rr_sleep(struct domain *d) { + unsigned long flags; + if ( test_bit(DF_RUNNING, &d->flags) ) cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); - else if ( __task_on_runqueue(RUNLIST(d)) ) - __del_from_runqueue(RUNLIST(d)); + else + { + spin_lock_irqsave(&run_locks[d->processor], flags); + if ( __task_on_runqueue(RUNLIST(d)) ) + __del_from_runqueue(RUNLIST(d)); + spin_unlock_irqrestore(&run_locks[d->processor], flags); + } } void rr_wake(struct domain *d) { + unsigned long flags; struct domain *curr; - s_time_t now, min_time; - int cpu = d->processor; + s_time_t now, min_time; + int cpu = d->processor; + spin_lock_irqsave(&run_locks[cpu], flags); + /* If on the runqueue already then someone has done the wakeup work. */ if ( unlikely(__task_on_runqueue(RUNLIST(d)))) + { + spin_unlock_irqrestore(&run_locks[cpu], flags); return; + } __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu)); + spin_unlock_irqrestore(&run_locks[cpu], flags); + now = NOW(); + spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags); curr = schedule_data[cpu].curr; - + spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags); + /* Currently-running domain should run at least for ctx_allow. */ min_time = curr->lastschd + curr->min_slice; @@ -194,7 +221,7 @@ static void rr_dump_cpu_state(int i) int loop = 0; struct rrobin_dom_info *d_inf; - spin_lock_irqsave(&schedule_data[i].schedule_lock, flags); + spin_lock_irqsave(&run_locks[i], flags); queue = RUNQUEUE(i); printk("QUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue, @@ -210,7 +237,7 @@ static void rr_dump_cpu_state(int i) d_inf = list_entry(list, struct rrobin_dom_info, run_list); rr_dump_domain(d_inf->domain); } - spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags); + spin_unlock_irqrestore(&run_locks[i], flags); } diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 4a8a1b0612..7b12f8f5ab 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -164,13 +164,13 @@ void init_idle_task(void) void domain_sleep(struct domain *d) { unsigned long flags; - int cpu = d->processor; - spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags); + /* sleep and wake protected by domain's state_lock */ + spin_lock_irqsave(&d->state_lock, flags); if ( likely(!domain_runnable(d)) ) SCHED_OP(sleep, d); - spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags); - + spin_unlock_irqrestore(&d->state_lock, flags); + /* Synchronous. */ while ( test_bit(DF_RUNNING, &d->flags) && !domain_runnable(d) ) { @@ -182,8 +182,9 @@ void domain_sleep(struct domain *d) void domain_wake(struct domain *d) { unsigned long flags; - int cpu = d->processor; - spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags); + + spin_lock_irqsave(&d->state_lock, flags); + if ( likely(domain_runnable(d)) ) { TRACE_2D(TRC_SCHED_WAKE, d->domain, d); @@ -192,7 +193,8 @@ void domain_wake(struct domain *d) d->wokenup = NOW(); #endif } - spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags); + + spin_unlock_irqrestore(&d->state_lock, flags); } /* Block the currently-executing domain until a pertinent event occurs. */ @@ -323,9 +325,9 @@ void __enter_scheduler(void) s32 r_time; /* time for new dom to run */ perfc_incrc(sched_run); - + spin_lock_irq(&schedule_data[cpu].schedule_lock); - + now = NOW(); rem_ac_timer(&schedule_data[cpu].s_timer); @@ -349,9 +351,9 @@ void __enter_scheduler(void) r_time = next_slice.time; next = next_slice.task; - + schedule_data[cpu].curr = next; - + next->lastschd = now; /* reprogramm the timer */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 7838d1feff..7f67e9ee48 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -101,6 +101,7 @@ struct domain /* Scheduling. */ int shutdown_code; /* code value from OS (if DF_SHUTDOWN). */ + spinlock_t state_lock; /* wake/sleep lock */ s_time_t lastschd; /* time this domain was last scheduled */ s_time_t lastdeschd; /* time this domain was last descheduled */ s_time_t cpu_time; /* total CPU time received till now */ -- 2.30.2